****************************************************************
* Inverse Probability Weighting 
****************************************************************

* Dataset setup
use "$data\4_individual_ano_vars.dta", clear

drop 		if id_indiv == .
bysort 		id_indiv (wave): gen n = _n
drop 		if n == 4
drop 		n
duplicates 	tag id_indiv wave, gen(dup)
bysort 		id_indiv (wave): gen n = _n if dup == 1
drop 		if id_indiv == 756306 & n == 3
drop 		dup n

merge 		1:1 id_indiv wave using "$data\4_data_for_balance.dta", keepusing(used_w3_w1) nogen
set 		seed 12345
set 		level 90

rename 		assignment ITT

bysort 		id_indiv: egen keep = max(used_w3_w1)

local balvars wrcon sosec mwage_mio aminwage empquality_ind hoursworked satisfied training_any

foreach var of varlist `balvars' {
    gen `var'_baseline = `var' if wave == 1
}

local balvars_baseline wrcon_baseline sosec_baseline mwage_mio_baseline aminwage_baseline empquality_ind_baseline hoursworked_baseline satisfied_baseline training_any_baseline

foreach var of varlist `balvars_baseline' {
    egen `var'_std = std(`var') if wave == 1
    replace `var'_std = 0 if missing(`var') & wave == 1
    gen mis_`var' = 0 & wave == 1
    replace mis_`var' = 1 if missing(`var') & wave == 1
}

global vars age_base  male_base  married_cohabiting_base  educ_none_base educ_prim_base educ_sec_base educ_tert_base ///
    supervisor_base  supervised_base  experience_sector_base  tenure_years_base  /// 
    wrcon_baseline_std sosec_baseline_std mwage_mio_baseline_std aminwage_baseline_std empquality_ind_baseline_std hoursworked_baseline_std satisfied_baseline_std training_any_baseline_std ///
    mis_wrcon_baseline mis_sosec_baseline mis_mwage_mio_baseline mis_aminwage_baseline mis_empquality_ind_baseline mis_hoursworked_baseline mis_satisfied_baseline mis_training_any_baseline

* Attrition - Wave 2  
local c = 1
preserve 

foreach o of varlist aminwage wrcon sosec empquality_ind  lwage {
    drop `o'
    merge m:1 id_indiv wave using "$data\4_individual_ano_reg_1.dta", keepusing(`o'_2021 missing_`o'_2021 `o') nogen

    replace status = 1 if status == 4
    areg `o' ITT `o'_2021 $controls_balance missing_`o'_2021 i.wave if wave == 2 $se_indiv

    predict resid_`o', residuals
    gen inc_in_reg_`o' = e(sample)
    bys id_indiv: egen sel_wave_2_`o' = max(inc_in_reg_`o')
	replace sel_wave_2_`o' =. if wave == 1 & employer == 1 & sel_wave_2_`o'==0
	
    * Step 1: Estimate probability of staying in the sample
	probit sel_wave_2_`o' $vars if wave == 1
	predict ps_e_stay_`o' if e(sample)

	*** Stablized weights ***
	* Step 2: Calculate overall probabilities
	sum sel_wave_2_`o' if e(sample)
	scalar p_stay = r(mean)
	scalar p_leave = 1 - p_stay

	* Step 3: Generate stabilized weights
	gen ew_stab_`o' = .
	replace ew_stab_`o' = p_stay / ps_e_stay_`o' if sel_wave_2_`o' == 1
	replace ew_stab_`o' = p_leave / (1 - ps_e_stay_`o') if sel_wave_2_`o' == 0
	
    by id_indiv: egen ew_stab_max_`o' = max(ew_stab_`o')
	sum ew_stab_max_`o' if wave == 2, d

	replace ew_stab_max_`o' = 1 if wave==2 & sel_wave_2_`o'==1 & ew_stab_max_`o'==.
	
	label variable ew_stab_max_`o' "Stabilized IPW for attrition"
	
	*** Normalized weights ***
    gen ew_`o' = 1 / ps_e_stay_`o' if sel_wave_2_`o' == 1
    replace ew_`o' = 1 / (1 - ps_e_stay_`o') if sel_wave_2_`o' == 0
    by id_indiv: egen ew_max_`o' = max(ew_`o')
	sum ew_max_`o'	
	
	gen ew_max_`o'_std = ew_max_`o' / r(mean) if wave == 2
	sum ew_max_`o'_std, d	
	
	replace ew_max_`o' = 1 if  wave==2 & inc_in_reg_`o'==1 & ew_max_`o'==.
	replace ew_max_`o'_std = 1 if  wave==2 & inc_in_reg_`o'==1 & ew_max_`o'_std==.

	label variable ew_max_`o'_std "Normalized IPW for attrition"
	
	sum ew_stab_max_`o', d
	sum ew_max_`o'_std, d
	sum ew_max_`o', d

	*** Rerun regression
	
    eststo m`c': areg `o' ITT `o'_2021 $controls_balance missing_`o'_2021 i.wave [pw=ew_max_`o'] if wave == 2 $se_indiv

    quietly summarize `o' if ITT == 0
    local mu : display %5.2f r(mean)
    estadd local mu `mu'

    local ++c
}
restore

* Attrition - Wave 3 
local c = 1
preserve

foreach o of varlist aminwage wrcon sosec empquality_ind lwage  {
    drop `o'
    merge m:1 id_indiv wave using "$data\4_individual_ano_reg_2.dta", keepusing(`o'_2021 missing_`o'_2021 `o') nogen

    replace status = 1 if status == 4
    areg `o' ITT `o'_2021 $controls_balance missing_`o'_2021 i.wave if wave == 3 $se_indiv
    predict resid_`o', residuals
    gen inc_in_reg_`o' = e(sample)
    bys id_indiv: egen sel_wave_3_`o' = max(inc_in_reg_`o')
	replace sel_wave_3_`o' =. if wave == 1 & employer == 1 & sel_wave_3_`o'==0

    * Step 1: Estimate probability of staying in the sample
	probit sel_wave_3_`o' $vars if wave == 1
	predict ps_e_stay_`o' if e(sample)

	*** Stablized weights ***	
	* Step 2: Calculate overall probabilities
	sum sel_wave_3_`o' if e(sample)
	scalar p_stay = r(mean)
	scalar p_leave = 1 - p_stay

	* Step 3: Generate stabilized weights
	gen ew_stab_`o' = .
	replace ew_stab_`o' = p_stay / ps_e_stay_`o' if sel_wave_3_`o' == 1
	replace ew_stab_`o' = p_leave / (1 - ps_e_stay_`o') if sel_wave_3_`o' == 0
	
    by id_indiv: egen ew_stab_max_`o' = max(ew_stab_`o')
	sum ew_stab_max_`o' if wave == 3, d
	
	replace ew_stab_max_`o' = 1 if wave==3 & sel_wave_3_`o'==1 & ew_stab_max_`o'==.

	label variable ew_stab_max_`o' "Stabilized IPW for attrition, endline"

	*** Normalized weights ***	
    gen ew_`o' = 1 / ps_e_stay_`o' if sel_wave_3_`o' == 1
    replace ew_`o' = 1 / (1 - ps_e_stay_`o') if sel_wave_3_`o' == 0
    by id_indiv: egen ew_max_`o' = max(ew_`o')
	sum ew_max_`o'

	gen ew_max_`o'_std = ew_max_`o' / r(mean) if wave == 3
	sum ew_max_`o'_std, d	
	
	replace ew_max_`o' = 1 if  wave==3 & inc_in_reg_`o'==1 & ew_max_`o'==.
	replace ew_max_`o'_std = 1 if  wave==3 & inc_in_reg_`o'==1 & ew_max_`o'_std==.
	
	label variable ew_max_`o'_std "Normalized IPW for attrition"
	
	sum ew_stab_max_`o', d
	sum ew_max_`o'_std, d
	
	*** Rerun regression
    eststo n`c': areg `o' ITT `o'_2021 $controls_balance missing_`o'_2021 i.wave [pw=ew_max_`o'] if wave == 3 $se_indiv
    gen ipw_`o' = e(sample)
    quietly summarize `o' if ITT == 0
    local mu : display %5.2f r(mean)
    estadd local mu `mu'

    local ++c
}
restore

* Attrition - Pooled Sample  
local c = 1 
preserve

foreach o of varlist aminwage wrcon sosec empquality_ind lwage  {
    drop  `o'
    merge m:1 id_indiv wave using "$data\4_individual_ano_reg_3.dta", keepusing(`o'_2021 missing_`o'_2021 `o') nogen

    replace status = 1 if status == 4
	
	*Wave 2 weights
    areg `o' ITT `o'_2021 $controls_balance missing_`o'_2021 i.wave if wave == 2 $se_indiv

    predict resid_`o'_w2, residuals
    gen inc_in_reg_`o'_w2 = e(sample)
    bys id_indiv: egen sel_wave_2_`o' = max(inc_in_reg_`o'_w2)
	replace sel_wave_2_`o' =. if wave == 1 & employer == 1 & sel_wave_2_`o'==0

	* Step 1: Estimate probability of staying in the sample
	probit sel_wave_2_`o' $vars if wave == 1
	predict ps_e_stay_`o'_w2 if e(sample)

	*** Stabilized Weights ***
	* Step 2: Calculate overall probabilities
	sum sel_wave_2_`o' if e(sample)
	scalar p_stay_w2 = r(mean)
	scalar p_leave_w2 = 1 - p_stay_w2

	* Step 3: Generate stabilized weights
	gen ew_stab_`o'_w2 = .
	replace ew_stab_`o'_w2 = p_stay_w2 / ps_e_stay_`o'_w2 if sel_wave_2_`o' == 1
	replace ew_stab_`o'_w2 = p_leave / (1 - ps_e_stay_`o'_w2) if sel_wave_2_`o' == 0
	
    by id_indiv: egen ew_stab_max_`o'_w2 = max(ew_stab_`o'_w2)
	sum ew_stab_max_`o'_w2 if wave ==2, d

	*** Normalied Weights ***
    gen ew_`o'_w2 = 1 / ps_e_stay_`o'_w2 if sel_wave_2_`o' == 1
    replace ew_`o'_w2 = 1 / (1 - ps_e_stay_`o'_w2) if sel_wave_2_`o' == 0
    by id_indiv: egen ew_max_`o'_w2 = max(ew_`o'_w2)
	sum ew_max_`o'_w2
	gen ew_max_`o'_std_w2 = ew_max_`o'_w2 / r(mean) if wave == 2
	sum ew_max_`o'_std_w2, d	
	
	replace ew_max_`o'_w2 = 1 if  wave==2 & inc_in_reg_`o'_w2==1 & ew_max_`o'_w2==.
	replace ew_max_`o'_std_w2 = 1 if  wave==2 & inc_in_reg_`o'_w2==1 & ew_max_`o'_std_w2==.

	*Wave 3 weights
    areg `o' ITT `o'_2021 $controls_balance missing_`o'_2021 i.wave if wave == 3 $se_indiv
    predict resid_`o'_w3, residuals
    gen inc_in_reg_`o'_w3 = e(sample)
    bys id_indiv: egen sel_wave_3_`o'_w3 = max(inc_in_reg_`o'_w3)
	replace sel_wave_2_`o' =. if wave == 1 & employer == 1 & sel_wave_3_`o'==0


	* Step 1: Estimate probability of staying in the sample
	probit sel_wave_3_`o' $vars if wave == 1
	predict ps_e_stay_`o'_w3 if e(sample)

	*** Stabilized Weights ***	
	* Step 2: Calculate overall probabilities
	sum sel_wave_3_`o' if e(sample)
	scalar p_stay_w3 = r(mean)
	scalar p_leave_w3 = 1 - p_stay_w3

	* Step 3: Generate stabilized weights
	gen ew_stab_`o'_w3 = .
	replace ew_stab_`o'_w3 = p_stay_w3 / ps_e_stay_`o'_w3 if sel_wave_3_`o' == 1
	replace ew_stab_`o'_w3 = p_leave / (1 - ps_e_stay_`o'_w3) if sel_wave_3_`o' == 0
	
    by id_indiv: egen ew_stab_max_`o'_w3 = max(ew_stab_`o'_w3)
	sum ew_stab_max_`o'_w3 if wave ==3, d
	
	*** Normalied Weights ***
    gen ew_`o'_w3 = 1 / ps_e_stay_`o'_w3 if sel_wave_3_`o'_w3 == 1
    replace ew_`o'_w3 = 1 / (1 - ps_e_stay_`o'_w3) if sel_wave_3_`o'_w3 == 0
    by id_indiv: egen ew_max_`o'_w3 = max(ew_`o'_w3)
	sum ew_max_`o'_w3
	gen ew_max_`o'_std_w3 = ew_max_`o'_w3 / r(mean) if wave == 3
	sum ew_max_`o'_std_w3, d	

	replace ew_max_`o'_w3 = 1 if  wave==3 & inc_in_reg_`o'_w3==1 & ew_max_`o'_w3==.
	replace ew_max_`o'_std_w3 = 1 if  wave==3 & inc_in_reg_`o'_w3==1 & ew_max_`o'_std_w3==.

	*** Stabalized weights for both waves
	gen 	ew_stab_`o' = ew_stab_max_`o'_w2 if wave == 2
	replace ew_stab_`o' = ew_stab_max_`o'_w3 if wave == 3
	replace ew_stab_`o' = 1 if wave == 2 & sel_wave_2_`o' ==1 & ew_stab_`o'==.
	replace ew_stab_`o' = 1 if wave == 3 & sel_wave_3_`o' ==1 & ew_stab_`o'==.

	*** Normalized weights for both waves
	gen ew_max_`o'_std= ew_max_`o'_std_w2 
	replace ew_max_`o'_std = ew_max_`o'_std_w3 if ew_max_`o'_std_w3!=.
	
	*** Classic weights for both waves
	gen ew_max_`o'= ew_max_`o'_w2 
	replace ew_max_`o' = ew_max_`o'_w3 if ew_max_`o'_w3!=.	
	
	*** Rerun regression
    eststo l`c': areg `o' ITT `o'_2021 $controls_balance missing_`o'_2021 i.wave [pw=ew_max_`o'] if inlist(wave, 2, 3) $se_indiv

    quietly summarize `o' if ITT == 0
    local mu : display %5.2f r(mean)
    estadd local mu `mu'

    local ++c
}
restore

* Export ITT results  

esttab m1 n1 l1 m2 n2 l2 m3 n3 l3 m4 n4 l4 m5 n5 l5  using "$results\01_tables\Table_S18_robust_inverse_pw.tex", ///
    keep(ITT) b(%5.2f) se(%5.2f) ///
    star(* 0.10 ** 0.05 *** 0.01) ///
    scalars("r2 R2" "mu Mean") ///
    nomtitles noobs noline nonotes nonumbers ///
    prehead("\begin{tabular}{l*{10}{c}}\hline\hline\\[2pt]\multicolumn{11}{l}{\textbf{Panel A: Main Outcomes}}\\[2pt]\hline\\[2pt]&\multicolumn{3}{c}{Min. Wage (0/1)}&\multicolumn{3}{c}{Written Contract (0/1)}&\multicolumn{3}{c}{Social Security (0/1)}&\multicolumn{3}{c}{Formality Index (0-1)}&\multicolumn{3}{c}{Wage (Log.)}\\\cmidrule(lr){2-4}\cmidrule(lr){5-7}\cmidrule(lr){8-10}\cmidrule(lr){11-13}\cmidrule(lr){14-16}\\[2pt]") ///
    posthead("&\multicolumn{1}{c}{6 M.}&\multicolumn{1}{c}{18 M.}&\multicolumn{1}{c}{Pooled}&\multicolumn{1}{c}{6 M.}&\multicolumn{1}{c}{18 M.}&\multicolumn{1}{c}{Pooled}&\multicolumn{1}{c}{6 M.}&\multicolumn{1}{c}{18 M.}&\multicolumn{1}{c}{Pooled}&\multicolumn{1}{c}{6 M.}&\multicolumn{1}{c}{18 M.}&\multicolumn{1}{c}{Pooled}&\multicolumn{1}{c}{6 M.}&\multicolumn{1}{c}{18 M.}&\multicolumn{1}{c}{Pooled}\\[2pt]\hline\\") ///
    prefoot("") postfoot("\\") replace

esttab m1 n1 l1 m2 n2 l2 m3 n3 l3 m4 n4 l4 m5 n5 l5 using "$results\01_tables\Table_S18_robust_inverse_pw.tex", ///
    drop(*) stats(N, fmt(%9.0f) labels("N")) ///
    nomtitles noline nonotes nonumbers ///
    prehead("") posthead("") prefoot("") ///
    postfoot("\hline\hline\\\multicolumn{9}{l}{\footnotesize * p<0.10, ** p<0.05, *** p<0.01}\end{tabular}") ///
    append
	
***Print to STATA
esttab m1 n1 l1 m2 n2 l2 m3 n3 l3 m4 n4 l4 m5 n5 l5, ///
    keep(ITT) b(%5.2f) se(%5.2f) ///
    star(* 0.10 ** 0.05 *** 0.01) ///
    scalars("r2 R2" "mu Mean") ///
    mtitles("6 M." "18 M." "Pooled" "6 M." "18 M." "Pooled" "6 M." "18 M." "Pooled" "6 M." "18 M." "Pooled" "6 M." "18 M." "Pooled") ///
    mgroups("Min. Wage (0/1)" "Written Contract (0/1)" "Social Security (0/1)" "Formality Index (0-1)" "Wage (Log.)", ///
        pattern(1 0 0 1 0 0 1 0 0 1 0 0 1 0 0) ///
        prefix(\multicolumn{@span}{c}{) suffix(}) ///
        span erepeat(\cmidrule(lr){@span})) ///
    substitute(_ \_) ///
    addnotes("* p<0.10, ** p<0.05, *** p<0.01") ///
    title("Panel A: Main Outcomes")
